//cleaning the acs race by age population files

global JT "/Users/juliaturner/Dropbox/Northwestern/JTresearch/minwage"
global data "/Users/juliaturner/Dropbox/Northwestern/JTresearch/minwage_ccs/data"

********************************************************************************
use "${data}/build_acs_race_age_pop.dta", clear

keep if year >= 2000 //we dont have yearly estimates before 2000, so we drop
 drop if sample == 200004 //this drops the 2000 ACS sample (which is very sparse) in favor of the 2000 1% sample
 
 drop sample serial cbserial hhwt cluster strata gq pernum raced hispand
 
//making hispanic a "race" here
replace race = 20 if hispan > 0


//keep only the sample we are interested in
keep if age >= 18 & age <= 30

//generating estimated population counts using perwt
collapse (sum) perwt, by(year statefip race)
	rename perwt pop_1830
	replace pop1830 = pop1830/100 //corrects for the two implied decimals in perwt

reshape wide pop_1830 , i(year statefip) j(race)
	rename (pop_18301 pop_18302 pop_18303 pop_18304) (pop_1830_white pop_1830_black pop_1830_native pop_1830_chinese)
	rename (pop_18305 pop_18306 pop_18307 pop_18308) (pop_1830_japanese pop_1830_asianoth pop_1830_other pop_1830_two)
	rename (pop_18309 pop_183020) (pop_1830_threeplus pop_1830_hispanic)
		
egen pop_1830_asian = rowtotal(pop_1830_chinese pop_1830_japanese pop_1830_asianoth)
egen pop_1830_twoplus = rowtotal(pop_1830_two pop_1830_threeplus)
	drop pop_1830_threeplus pop_1830_two pop_1830_chinese pop_1830_japanese pop_1830_asianoth
		
egen pop_1830_total = rowtotal(pop_1830_*)
foreach x in white black hispanic asian native other twoplus{
	gen pct_`x' = pop_1830_`x'/pop_1830_total
	gen lnpop_1830_`x' = ln(pop_1830_`x')
}
gen lnpop_1830_total = ln(pop_1830_total)

save "${JT}/data/clean_acs_race_age_pop_state.dta", replace






 